{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def createVocabList(dataSet):\n",
    "    vocabSet = set([])  #create empty set\n",
    "    for document in dataSet:\n",
    "        vocabSet = vocabSet | set(document) #union of the two sets\n",
    "    return list(vocabSet)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def createVocabList2(dataSet):\n",
    "    vocabSet = set(dataSet)  #create empty set\n",
    "    return list(vocabSet)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'1'}\n",
      "{'3', '1'}\n",
      "{'3', '5', '1'}\n",
      "{'3', '7', '5', '1'}\n",
      "{'3', '9', '1', '7', '5'}\n",
      "{'3', '9', '1', '7', '5'}\n",
      "['3', '9', '1', '7', '5']\n",
      "['9', '3', '11', '1', '7', '5']\n"
     ]
    }
   ],
   "source": [
    "print(createVocabList(['1','3','5','7','9','11']))\n",
    "print(createVocabList2(['1','3','5','7','9','11']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'1'}\n"
     ]
    }
   ],
   "source": [
    "a = set(['1','3','5','7','9']) | set('11')\n",
    "print(a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'9', '3', '11', '1', '7', '5'}\n"
     ]
    }
   ],
   "source": [
    "a = set(['1','3','5','7','9','11'])\n",
    "print(a)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'1'}\n"
     ]
    }
   ],
   "source": [
    "print(set('11'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "postingList = [['my', 'dog', 'has', 'flea', 'problems', 'help', 'please'],\n",
    "                 ['maybe', 'not', 'take', 'him', 'to', 'dog', 'park', 'stupid'],\n",
    "                 ['my', 'dalmation', 'is', 'so', 'cute', 'I', 'love', 'him'],\n",
    "                 ['stop', 'posting', 'stupid', 'worthless', 'garbage'],\n",
    "                 ['mr', 'licks', 'ate', 'my', 'steak', 'how', 'to', 'stop', 'him'],\n",
    "                 ['quit', 'buying', 'worthless', 'dog', 'food', 'stupid']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['not', 'worthless', 'licks', 'take', 'so', 'help', 'I', 'cute', 'him', 'garbage', 'steak', 'food', 'buying', 'has', 'how', 'dog', 'posting', 'quit', 'please', 'stupid', 'dalmation', 'is', 'stop', 'mr', 'to', 'ate', 'love', 'park', 'flea', 'my', 'maybe', 'problems']\n"
     ]
    },
    {
     "ename": "TypeError",
     "evalue": "unhashable type: 'list'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-30-1f8d798782a1>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcreateVocabList\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpostingList\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcreateVocabList2\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpostingList\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32m<ipython-input-29-e9b6fca02e57>\u001b[0m in \u001b[0;36mcreateVocabList2\u001b[1;34m(dataSet)\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mcreateVocabList2\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdataSet\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m     \u001b[0mvocabSet\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mset\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdataSet\u001b[0m\u001b[1;33m)\u001b[0m  \u001b[1;31m#create empty set\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      3\u001b[0m     \u001b[1;32mreturn\u001b[0m \u001b[0mlist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvocabSet\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mTypeError\u001b[0m: unhashable type: 'list'"
     ]
    }
   ],
   "source": [
    "print(createVocabList(postingList))\n",
    "print(createVocabList2(postingList))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
